{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.grid_search import GridSearchCV\n",
    "from sklearn.cross_validation import ShuffleSplit\n",
    "from sklearn.cross_validation import cross_val_score\n",
    "import dautil as dl\n",
    "from sklearn.ensemble import ExtraTreesRegressor\n",
    "from joblib import Memory\n",
    "import numpy as np\n",
    "from IPython.display import HTML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "memory = Memory(cachedir='.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "@memory.cache\n",
    "def get_scores():\n",
    "    df = dl.data.Weather.load()[['WIND_SPEED', 'TEMP', 'PRESSURE']].dropna()\n",
    "    X = df.values[:-1]\n",
    "    y = df['TEMP'][1:]\n",
    "\n",
    "    params = { 'min_samples_split': [1, 3],\n",
    "            'min_samples_leaf': [3, 4]}\n",
    "\n",
    "    gscv = GridSearchCV(ExtraTreesRegressor(bootstrap=True,\n",
    "                                            random_state=37),\n",
    "                        param_grid=params, n_jobs=-1, cv=5)\n",
    "    cv_outer = ShuffleSplit(len(X), n_iter=500,\n",
    "                            test_size=0.3, random_state=55)\n",
    "    r2 = []\n",
    "    best = []\n",
    "    means = []\n",
    "    stds = []\n",
    "\n",
    "    for train_indices, test_indices in cv_outer:\n",
    "        train_i = X[train_indices], y[train_indices]\n",
    "        gscv.fit(*train_i)\n",
    "        test_i = X[test_indices]\n",
    "        gscv.predict(test_i)\n",
    "        grid_scores = dl.collect.flatten([g.cv_validation_scores\n",
    "            for g in gscv.grid_scores_])\n",
    "        r2.extend(grid_scores)\n",
    "        means.extend(dl.collect.flatten([g.mean_validation_score\n",
    "            for g in gscv.grid_scores_]))\n",
    "        stds.append(np.std(grid_scores))\n",
    "        best.append(gscv.best_score_)\n",
    "\n",
    "    return {'r2': r2, 'best': best, 'mean': means, 'std': stds}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "scores = get_scores()\n",
    "r2 = np.array(scores['r2'])\n",
    "avgs = np.array(scores['mean'])\n",
    "stds = np.array(scores['std'])\n",
    "best = np.array(scores['best'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "dl.options.mimic_seaborn()\n",
    "context = dl.nb.Context('nested_cv')\n",
    "dl.nb.RcWidget(context)\n",
    "dl.nb.LabelWidget(2, 2, context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "sp = dl.plotting.Subplotter(2, 2, context)\n",
    "dl.plotting.hist_norm_pdf(sp.ax, r2)\n",
    "sp.label()\n",
    "\n",
    "dl.plotting.hist_norm_pdf(sp.next_ax(), best)\n",
    "sp.label()\n",
    "\n",
    "dl.plotting.hist_norm_pdf(sp.next_ax(), avgs)\n",
    "sp.label()\n",
    "\n",
    "dl.plotting.hist_norm_pdf(sp.next_ax(), stds)\n",
    "sp.label()\n",
    "HTML(sp.exit())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
